library(Seurat)
library(SeuratObject)
library(SeuratDisk)
library(tidyverse)
library(patchwork)
library(harmony)
First give a try to integrate all the human wound and mouse wound data
# load the human and mouse converted gene symbols (made by myself)
hs_ms_genes <- data.table::fread("Private_human_mouse_GeneSymbol_conversion.txt")
# Day 3 after wounding (Dongqing Snhg26)
ms_seu <- readRDS("../s1-scData/01-our ms data/s2_miceD3data_onlyWT.rds")
## convert the mouse gene symbol to human gene symbol (orthology)
exp_mtx <- as.matrix(ms_seu@assays$RNA@counts);dim(exp_mtx)
v2genes <- data.frame(ms_gene = rownames(exp_mtx)) %>% left_join(., hs_ms_genes)
table(is.na(v2genes$hs_gene))
##lots of NAs for which there are no human genes matching
## Remove NAs
v2genes <- v2genes[!is.na(v2genes$hs_gene),,F]
## Filter the expression matrix for genes which a mouse counterpart is available
exp_mtx <- exp_mtx[v2genes$ms_gene,]
## Now change the rownames of the matrix to the mouse gene names
rownames(exp_mtx) <- v2genes$hs_gene
#check the duplicated gene names
duplicated(rownames(exp_mtx)) %>% table()
exp_mtx <- exp_mtx[!duplicated(rownames(exp_mtx)),]
dim(exp_mtx);identical(colnames(exp_mtx), colnames(ms_seu))
## Create the seurat object with mouse genes.
ms_seu <- CreateSeuratObject(counts = exp_mtx, meta.data = ms_seu@meta.data)
rownames(ms_seu)[grep("KRT6", rownames(ms_seu))]
rownames(ms_seu)[grep("MMP", rownames(ms_seu))]
saveRDS(ms_seu, file = "../s1-scData/01-our ms data/s2_miceD3data_onlyWT_humanGenes.rds")
# mouse wound with human gene symbol (own data)
ms_seu <- readRDS("../all_miceD3data_onlyWT_humanGenes.rds")
ms_seu$Condition <- gsub("Skin", "Unwound", ms_seu$Condition) # Change the name since it is the same as human wound
ms_seu$Condition <- gsub("Wound", "DPW3", ms_seu$Condition)
ms_seu <- subset(ms_seu, subset = Condition == "DPW3")
table(ms_seu$orig.ident)
##
## WTwound
## 4601
rownames(ms_seu)[grep("^IL", rownames(ms_seu))] %>% sort()
## [1] "IL10" "IL10RA" "IL10RB" "IL11" "IL11RA" "IL12A"
## [7] "IL12B" "IL12RB1" "IL12RB2" "IL13" "IL13RA1" "IL13RA2"
## [13] "IL15" "IL15RA" "IL16" "IL17B" "IL17D" "IL17F"
## [19] "IL17RA" "IL17RB" "IL17RC" "IL17RD" "IL17RE" "IL18"
## [25] "IL18BP" "IL18R1" "IL18RAP" "IL19" "IL1A" "IL1B"
## [31] "IL1F10" "IL1R1" "IL1R2" "IL1RAP" "IL1RAPL1" "IL1RL1"
## [37] "IL1RL2" "IL1RN" "IL2" "IL20" "IL20RA" "IL20RB"
## [43] "IL21R" "IL22RA1" "IL22RA2" "IL23A" "IL23R" "IL24"
## [49] "IL27" "IL27RA" "IL2RA" "IL2RB" "IL2RG" "IL31RA"
## [55] "IL33" "IL34" "IL3RA" "IL4I1" "IL4R" "IL5"
## [61] "IL6" "IL6R" "IL6ST" "IL7" "IL7R" "IL9R"
## [67] "ILDR1" "ILDR2" "ILF2" "ILF3" "ILK" "ILKAP"
## [73] "ILRUN" "ILVBL"
# Human skin wound healing
hs_seu <- readRDS("../all_human_wound_metadata.rds")
#hs_seu <- subset(hs_seu, subset = Condition == "Skin" | Condition == "Wound1")
hs_seu <- subset(hs_seu, subset = Condition == "Wound1")
hs_seu@meta.data <- droplevels(hs_seu@meta.data)
table(hs_seu$orig.ident)
##
## PWH26D1 PWH27D1 PWH28D1
## 4662 6300 4484
rownames(hs_seu)[grep("^IL", rownames(hs_seu))] %>% sort()
## [1] "IL10" "IL10RA" "IL10RB" "IL10RB-DT" "IL11"
## [6] "IL11RA" "IL12A" "IL12A-AS1" "IL12B" "IL12RB1"
## [11] "IL12RB2" "IL13" "IL13RA1" "IL13RA2" "IL15"
## [16] "IL15RA" "IL16" "IL17A" "IL17B" "IL17D"
## [21] "IL17RA" "IL17RB" "IL17RC" "IL17RD" "IL17RE"
## [26] "IL18" "IL18BP" "IL18R1" "IL18RAP" "IL19"
## [31] "IL1A" "IL1B" "IL1F10" "IL1R1" "IL1R2"
## [36] "IL1RAP" "IL1RAPL1" "IL1RAPL2" "IL1RL1" "IL1RL2"
## [41] "IL1RN" "IL2" "IL20" "IL20RA" "IL20RB"
## [46] "IL20RB-AS1" "IL21R" "IL21R-AS1" "IL22" "IL22RA1"
## [51] "IL22RA2" "IL23A" "IL23R" "IL24" "IL25"
## [56] "IL26" "IL27" "IL27RA" "IL2RA" "IL2RB"
## [61] "IL2RG" "IL31RA" "IL32" "IL33" "IL34"
## [66] "IL36B" "IL36G" "IL36RN" "IL37" "IL3RA"
## [71] "IL4" "IL4I1" "IL4R" "IL5" "IL5RA"
## [76] "IL6" "IL6-AS1" "IL6R" "IL6R-AS1" "IL6ST"
## [81] "IL7" "IL7R" "IL9R" "ILDR1" "ILDR2"
## [86] "ILF2" "ILF3" "ILF3-DT" "ILK" "ILKAP"
## [91] "ILRUN" "ILRUN-AS1" "ILVBL" "ILVBL-AS1"
# check the overlapped genes and keep the overlapped genes
overgene <- intersect(rownames(ms_seu), rownames(hs_seu))
length(overgene)
## [1] 14934
ms_seu <- ms_seu[overgene, ]
hs_seu <- hs_seu[overgene, ]
# integrate based on orig.ident names
ms_seu$Species="Mouse"
hs_seu$Species="Human"
all_seu <- merge(hs_seu, y = c(ms_seu),
project = "wounds")
# sampling the cell numbers
lowNum <- min(table(all_seu$Species))
metadata <- all_seu@meta.data %>% rownames_to_column(var = "barcode")
to_keep <- metadata %>% group_by(Species) %>% sample_n(size = lowNum, replace = FALSE) %>%
pull(barcode)
all_seu <- subset(all_seu, cells = to_keep)
table(all_seu$Condition)
##
## DPW3 Wound1
## 4601 4601
alldata = SplitObject(all_seu, split.by = "orig.ident")
rm(all_seu)
# normalize and identify variable features for each dataset independently
alldata <- lapply(X = alldata, FUN = function(x) {
x <- NormalizeData(x)
x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 4000)
})
# Run CCA using defaults
features <- SelectIntegrationFeatures(object.list = alldata, nfeatures = 3000)
anno <- readRDS("../gene_conversion_hs_ms/humanAnnotation.rds")
features <- features %>% as.data.frame() %>% setNames("gene") %>% left_join(., anno[, c(1,3)], by=c("gene"="external_gene_name")) %>%
filter(gene_biotype == "protein_coding" | gene_biotype == "lncRNA") %>% distinct(gene) %>% pull(gene)
rn <- lapply(alldata, rownames)
nE <- colSums(Reduce(rbind, lapply(rn, function(x) features %in% x)))
features <- features[nE == length(alldata)] #Keep only variable genes expressed in all samples
length(features)
## [1] 2990
alldata.anchors <- FindIntegrationAnchors(object.list = alldata, anchor.features = features)
# this command creates an 'integrated' data assay
inteData <- IntegrateData(anchorset = alldata.anchors)
# specify that we will perform downstream analysis on the corrected data note that the
# original unmodified data still resides in the 'RNA' assay
DefaultAssay(inteData) <- "integrated"
VariableFeatures(inteData) <- features
# Run the standard workflow for visualization and clustering
inteData <- ScaleData(inteData, verbose = FALSE)
inteData <- RunPCA(inteData, npcs = 30, verbose = FALSE)
inteData <- RunUMAP(inteData, dims = 1:30, assay = "integrated", reduction = "pca", n.neighbors = 30,
min.dist = 0.5, n.epochs = 500, spread = 1, learning.rate = 1)
DimPlot(inteData, group.by = "orig.ident") + NoAxes()
inteData = FindNeighbors(inteData, dims = 1:30, reduction = "pca", k.param = 30)
inteData = FindClusters(inteData, resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 9202
## Number of edges: 487989
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9446
## Number of communities: 18
## Elapsed time: 1 seconds
inteData = FindClusters(inteData, resolution = 1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 9202
## Number of edges: 487989
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8992
## Number of communities: 29
## Elapsed time: 1 seconds
inteData = FindClusters(inteData, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 9202
## Number of edges: 487989
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9287
## Number of communities: 24
## Elapsed time: 1 seconds
inteData = FindClusters(inteData, resolution = 0.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 9202
## Number of edges: 487989
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9102
## Number of communities: 27
## Elapsed time: 1 seconds
#inteData$Condition <- factor(inteData$Condition, levels = c("Skin", "Wound1", "Unwound", "DPW3"))
inteData$Condition <- factor(inteData$Condition, levels = c("Wound1", "DPW3"))
DefaultAssay(inteData) <- "RNA"
inteData <- NormalizeData(inteData)
DimPlot(inteData, group.by = "Species", cols = c("#d8b365", "#5ab4ac")) + ggtitle("Cross species integration")
DimPlot(inteData, group.by = "Species", split.by = "Species", cols = c("#d8b365", "#5ab4ac")) + ggtitle("Cross species integration")
DimPlot(inteData, group.by = "CellType", label = T, split.by = "Species") + ggtitle("")
DimPlot(inteData, group.by = "CellType", label = T, split.by = "Condition", ncol = 4) + ggtitle("") + NoLegend()
DimPlot(inteData, group.by = "integrated_snn_res.0.3", label = T, split.by = "Condition")
DimPlot(inteData, group.by = "integrated_snn_res.0.5", label = T, split.by = "Condition")
DimPlot(inteData, group.by = "integrated_snn_res.0.8", label = T, split.by = "Condition")
DimPlot(inteData, group.by = "integrated_snn_res.1", label = T, split.by = "Condition")
saveRDS(inteData, "all_HsMs_CCA_W1_DPW3_sampling_noSkin.rds")
inteData <- readRDS("all_HsMs_CCA_W1_DPW3_noSkin.rds")
DimPlot(inteData, group.by = "CellType", label = T, split.by = "Condition", ncol = 4) + ggtitle("") + NoLegend()
table(inteData$CellType, inteData$integrated_snn_res.0.3)
###############################################
#-- Cell proportion analysis across species --#
## Step 1. calculate the cell types number per sample, add the location information,
## total numbers, as well the condition information
metadata <- inteData@meta.data
cl_species <- table(metadata$Condition, metadata$Species) %>% as.data.frame() %>% dplyr::filter(Freq > 0)
sm_ct_nu <- table(metadata$Condition, metadata$integrated_snn_res.0.3) %>% as.data.frame() %>%
setNames(c("Sample", "CellCluster", "Num")) %>%
left_join(., cl_species, by = c("Sample" = "Var1")) %>% mutate(prop = Num / Freq)
## step 2. calculate the total normalized proportions of each cell type per condition
df.group <- sm_ct_nu %>% group_by(CellCluster, Var2) %>% summarise(Freq2=sum(prop)) %>%
ungroup() %>% group_by(CellCluster) %>% mutate(Freq_new = Freq2/sum(Freq2), lbl = scales::percent(Freq_new)) %>% ungroup()
ggplot(data=df.group, aes(x=" ", y=Freq_new, group=Var2, colour=Var2, fill=Var2)) +
geom_bar(width = 1, stat = "identity") +
scale_fill_manual(values = c("#c45027", "#00afbb")) +
scale_color_manual(values = c("#c45027", "#00afbb")) +
coord_polar("y", start=0) +
facet_grid(CellCluster ~ .) + theme_void()
pdf("PieChart_res0.05.pdf", useDingbats = FALSE, width = 3, height = 6)
dev.off()
sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS: /sw/apps/R/4.1.1/rackham/lib64/R/lib/libRblas.so
## LAPACK: /sw/apps/R/4.1.1/rackham/lib64/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] harmony_0.1.0 Rcpp_1.0.7 patchwork_1.1.1
## [4] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.7
## [7] purrr_0.3.4 readr_2.0.2 tidyr_1.1.4
## [10] tibble_3.1.5 ggplot2_3.3.5 tidyverse_1.3.1
## [13] SeuratDisk_0.0.0.9019 SeuratObject_4.0.2 Seurat_4.0.5
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.3.0 plyr_1.8.6
## [4] igraph_1.2.7 lazyeval_0.2.2 splines_4.1.1
## [7] listenv_0.8.0 scattermore_0.7 digest_0.6.28
## [10] htmltools_0.5.2 fansi_0.5.0 magrittr_2.0.1
## [13] tensor_1.5 cluster_2.1.2 ROCR_1.0-11
## [16] tzdb_0.1.2 globals_0.14.0 modelr_0.1.8
## [19] matrixStats_0.61.0 spatstat.sparse_2.0-0 colorspace_2.0-2
## [22] rvest_1.0.2 ggrepel_0.9.1 haven_2.4.3
## [25] xfun_0.28 crayon_1.4.2 jsonlite_1.7.2
## [28] spatstat.data_2.1-0 survival_3.2-13 zoo_1.8-9
## [31] glue_1.5.0 polyclip_1.10-0 gtable_0.3.0
## [34] leiden_0.3.9 future.apply_1.8.1 abind_1.4-5
## [37] scales_1.1.1 DBI_1.1.1 miniUI_0.1.1.1
## [40] viridisLite_0.4.0 xtable_1.8-4 reticulate_1.22
## [43] spatstat.core_2.3-1 bit_4.0.4 htmlwidgets_1.5.4
## [46] httr_1.4.2 RColorBrewer_1.1-2 ellipsis_0.3.2
## [49] ica_1.0-2 farver_2.1.0 pkgconfig_2.0.3
## [52] sass_0.4.0 uwot_0.1.10 dbplyr_2.1.1
## [55] deldir_1.0-6 utf8_1.2.2 labeling_0.4.2
## [58] tidyselect_1.1.1 rlang_0.4.12 reshape2_1.4.4
## [61] later_1.3.0 munsell_0.5.0 cellranger_1.1.0
## [64] tools_4.1.1 cli_3.1.0 generics_0.1.1
## [67] broom_0.7.10 ggridges_0.5.3 evaluate_0.14
## [70] fastmap_1.1.0 yaml_2.2.1 goftest_1.2-3
## [73] knitr_1.36 bit64_4.0.5 fs_1.5.0
## [76] fitdistrplus_1.1-6 RANN_2.6.1 pbapply_1.5-0
## [79] future_1.23.0 nlme_3.1-153 mime_0.12
## [82] xml2_1.3.2 hdf5r_1.3.4 compiler_4.1.1
## [85] rstudioapi_0.13 plotly_4.10.0 png_0.1-7
## [88] spatstat.utils_2.2-0 reprex_2.0.1 bslib_0.3.1
## [91] stringi_1.7.5 highr_0.9 RSpectra_0.16-0
## [94] lattice_0.20-45 Matrix_1.3-4 vctrs_0.3.8
## [97] pillar_1.6.4 lifecycle_1.0.1 spatstat.geom_2.3-0
## [100] lmtest_0.9-39 jquerylib_0.1.4 RcppAnnoy_0.0.19
## [103] data.table_1.14.2 cowplot_1.1.1 irlba_2.3.3
## [106] httpuv_1.6.3 R6_2.5.1 promises_1.2.0.1
## [109] KernSmooth_2.23-20 gridExtra_2.3 parallelly_1.28.1
## [112] codetools_0.2-18 MASS_7.3-54 assertthat_0.2.1
## [115] withr_2.4.2 sctransform_0.3.2 mgcv_1.8-38
## [118] parallel_4.1.1 hms_1.1.1 grid_4.1.1
## [121] rpart_4.1-15 rmarkdown_2.11 Rtsne_0.15
## [124] shiny_1.7.1 lubridate_1.8.0